# -*- coding: utf-8 -*-
import scrapy
from urllib import parse
from .. import items

class MovieSpiderSpider(scrapy.Spider):
    name = 'movie_spider'
    allowed_domains = ['www.ygdy8.net']  # 允许访问的域名
    start_urls = [
        'http://www.ygdy8.net/html/gndy/dyzz/list_23_1.html',
        'http://www.ygdy8.net/html/gndy/dyzz/list_23_2.html',
        'http://www.ygdy8.net/html/gndy/dyzz/list_23_3.html',
        'http://www.ygdy8.net/html/gndy/dyzz/list_23_4.html',
        'http://www.ygdy8.net/html/gndy/dyzz/list_23_5.html'
    ]  # 电影天堂最新电影 前五页

    def parse(self, response):
        """
        处理返回的Response
        """
        #获取信息列表
        table_list = response.xpath("//*[@class=\"co_content8\"]/ul/td/table")
        for table in table_list:
            #获取详情页url，并且把url让如到Scheduler中，等待爬取
            url = table.xpath("./tr[2]/td[2]/b/a/@href").extract()[0]
            yield scrapy.Request(parse.urljoin(response.url, url), callback=self.parse_detail)

    def parse_detail(self, response):
        # print(response.body.decode("gbk"))
        content = response.xpath("//*[@class=\"bd3r\"]")
        item = items.FirstScrapyItem()
        item["title"] = content.xpath("./div[@class=\"co_area2\"]/div[@class=\"title_all\"]/h1/font/text()").extract()[0]
        item["download_url"] = content.xpath("./div[@class=\"co_area2\"]/div[@class=\"co_content8\"]/ul/tr[3]/td[1]/div[@align=\"left\"]/div[@id=\"Zoom\"]/td/table/tbody/tr/td/a/@href").extract()[0]
        yield item